/*
Date: October 2025
Project: Income and Child Maltreatment: Evidence from a Discontinuity in Tax Benefits
Author: Katherine Rittenhouse
Purpose: This file uses relationships from ACS to predict household income and the dollar value of a december birth for each birth record.
Files in: predictorsplusoutcomeacs1yr.dta; Births_fkconsistent_deaths; 2010_Census_Tract_to_2010_PUMA; CPUMA0010_PUMA2010_components; 
Files out: pumacw; samplepluspredictions
*/

clear all
set more off

set maxvar 120000
use "predictorsplusoutcomeacs1yr.dta",clear

local fcontrols "meduc feduc childorder momrace dadrace momagebin dadagebin recentered_yr cpuma0010"
local ctrls "i.(`fcontrols')##i.(`fcontrols')"

replace childorder=3 if childorder>3

quietly reg dollarvalue `ctrls' [pweight=hhwt]
estimates store value

quietly reg aftertax `ctrls' [pweight=hhwt]
estimates store aftertax

import delim "2010_Census_Tract_to_2010_PUMA.txt",delim(comma) clear
keep if state==6
tostring state county tract,force replace
gen length = strlen(tract)
replace tract="00000"+tract if length==1
replace tract="0000"+tract if length==2
replace tract="000"+tract if length==3
replace tract="00"+tract if length==4
replace tract="0"+tract if length==5
replace length = strlen(state)
replace state="0"+state if length==1
replace length = strlen(county)
replace county="00"+county if length==1
replace county="0"+county if length==2

gen censustract=state+county+tract
replace length = strlen(censustract)
tab length
keep censustract puma
ren puma puma
save "pumacw.dta",replace

import excel "CPUMA0010_PUMA2010_components.xls",firstrow clear 
ren PUMA puma 
destring puma,force replace
keep if State_Name=="California"
merge 1:m puma using "pumacw.dta"
drop _m State_Name State_F GEO GIS
ren CPUMA cpuma
save "pumacw.dta",replace

use  "Births_fkconsistent_deaths",clear

merge m:1 censustract using "pumacw.dta"

drop if _merge==2
ren (momeduc popeduc) (meduc feduc)

ren cpuma cpuma0010
replace childorder=3 if childorder>3

gen jan1date = mdy(1,1,birth_y)
replace jan1date = mdy(1,1,birth_y+1) if birth_m>6
gen recentered_yr = year(jan1date)
gen et = bdate_mdy - jan1date
drop if birth_y==.
drop if birth_y==1999 & birth_m<=6
drop if birth_y==2019&birth_m>6

ren year year2
gen year = birth_y

estimates restore value
predict value_hat

estimates restore aftertax
predict aftertax_hat

save "samplepluspredictions.dta",replace
